## ----setseedch7, echo = FALSE--------------------------------------------
set.seed(1982)

knitr::opts_chunk$set(
                      echo = FALSE,
                      results = "hide",
                      warning = FALSE,
                      message = FALSE,
                      error = FALSE
)


## ----loadlibsch7---------------------------------------------------------
library(ggplot2)
library(scales)
library(dplyr)
library(reshape2)
library(pander)
library(lme4)
require(MASS)
library(rstanarm)
library(Amelia)
library(gridExtra)
source("common_funcs.R")



## ----loaddatach7---------------------------------------------------------
load("ch2_data.RData")


## ----mungech7------------------------------------------------------------
dat <- list()
for (i in 1:11) {
    dat[[i]] <- sheet1[,c("NEUTRAL", "CASEID","HANDDOWN","HEARINGDATE1",
                          paste0("JUDGE", i),
                          paste0("JUDGE", i, "DISS"),
                          paste0("JUDGE", i, "BEGINPG"),
                          paste0("JUDGE", i, "ENDPG"))]	
    names(dat[[i]]) <- c("NEUTRAL", "CASEID",
                         "HANDDOWN", "HEARINGDATE",
                         "JUDGE", "DISS",
                         "BEGINPG", "ENDPG")
}
dat <- do.call("rbind", dat)
dat <- subset(dat, !is.na(JUDGE))
dat$Dissented <- ifelse(dat$DISS %in% c("A", "B"), 1, 0)

sheet1$nDaysHearing <- apply(sheet1[,grep("HEARINGDATE", names(sheet1))],
                             1,
                             function(x) sum(!is.na(x)))

other.vars <- c("Importance", "OpinionBelow", "nDissents.appord",
                "nJudges", "Area",
                "HR", "RESPINTER", "APPINTER",
                "GENINTER", "nDaysHearing")

old <- nrow(dat)
dat <- merge(dat, sheet1[,c("NEUTRAL", "CASEID", other.vars)],
             all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)
stopifnot(old == new)

spec.m <- melt(spec, id.var  = c("Judge", "Name"),
               value.name = "Specialisation")
spec.m$variable <- as.character(spec.m$variable)
spec.m$variable[which(spec.m$variable == "Tax.and.Chancery")] <- "Chancery"
spec.m$variable[which(spec.m$variable == "NI")] <- "N Ireland"
spec.m$Area <- spec.m$variable
spec.m$variable <- NULL

old <- nrow(dat)
dat <- merge(dat, spec.m,
             by.x = c("JUDGE","Area"),
             by.y = c("Judge","Area"),
             all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)
stopifnot(old == new)

### Workload
old <- nrow(dat)
dat <- merge(dat, individualWorkload,
             by.x = c("JUDGE", "HANDDOWN"),
             by.y = c("Judge", "Date"),
             all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)
stopifnot(old == new)

### Temporary
roles <- read.csv("./data/office_holders.csv")
roles$Start <- as.Date(roles$Start)
roles$End <- as.Date(roles$End)

roles <- subset(roles, Role == "Member")
dat <- merge(dat, roles,
             by.x = "JUDGE",
             by.y ="Judge",
             all.x = TRUE,
             all.y = FALSE)

### Temporary
dat$Temporary <- is.na(dat$Start) |
    (!is.na(dat$Start) & dat$Start > dat$HANDDOWN) |
    (!is.na(dat$Start) & dat$End < dat$HANDDOWN)
### Freshman
dat$DaysExperience <- dat$HEARINGDATE - dat$Start
dat$DaysExperience[which(dat$DaysExperience < 0)] <- 0
dat$DaysExperience[which(dat$HEARINGDATE > dat$End)] <-
    dat$End[which(dat$HEARINGDATE > dat$End)] -
    dat$Start[which(dat$HEARINGDATE > dat$End)]
dat$Seniority <- dat$DaysExperience / 365.25

### Presiding
roles <- read.csv("./data/office_holders.csv")
roles$Start <- as.Date(roles$Start)
roles$End <- as.Date(roles$End)
dat$Role <- as.character(dat$Role)
roles$Role <- as.character(roles$Role)
for (i in which(roles$Role != "Member")) {
    matchpos <- which(dat$JUDGE == roles$Judge[i] &
                      dat$HANDDOWN > roles$Start[i] &
                      dat$HANDDOWN < roles$End[i])
    dat$Role[matchpos] <- roles$Role[i]
}

### Panel size
dat <- dat %>%
    group_by(NEUTRAL, CASEID) %>%
    mutate(panelSize = n())

### nIntervenors
intervars <- c("APPINTER","RESPINTER","GENINTER")
dat$nIntervenors <- apply(dat[,intervars],
                          1,
                          function(x) sum(x == "Yes",
                                          na.rm = TRUE))


## ----panelagreement------------------------------------------------------
get_agreement <- function(df) {
    holder <- list()
    for (i in 1:11) {
	holder[[i]] <- df[,c("NEUTRAL", "CASEID","HANDDOWN",
                             paste0("JUDGE",i),
                             paste0("JUDGE",i,"DISS"))]	
	names(holder[[i]]) <- c("NEUTRAL", "CASEID",
                                "HANDDOWN","JUDGE","DISS")
    }
    holder <- do.call("rbind", holder)
    holder <- subset(holder, !is.na(JUDGE))
    holder$DISS <- ifelse(holder$DISS == "C", 1, 0)

    holder <- holder %>%
        group_by(NEUTRAL) %>%
        mutate(w8 = 1 / length(unique(CASEID)))

### Create matrix
    votemat <- acast(holder, NEUTRAL + CASEID + HANDDOWN ~ JUDGE, 
                     value.var = "DISS")
    votemat[is.na(votemat)] <- 0

    holder$value <- 1
    presencemat <- acast(holder, NEUTRAL + CASEID + HANDDOWN ~ JUDGE, 
                         value.var = "value")
    presencemat[is.na(presencemat)] <- 0

### Create weights
    w8 <- apply(acast(holder, NEUTRAL + CASEID + HANDDOWN ~ JUDGE, 
                      value.var = "w8"),
                1,
                function(x) unique(na.omit(x)))

### Sort the first two columns of a dataframe
### and remove duplicates
    dedupe <- function(d) {
        pasteorder <- apply(d[,1:2], 1,
                            function(x) paste0(sort(x), collapse = "/"))
### Take the second one of the two
        d <- d[which(duplicated(pasteorder)),]
        d <- d[which(d[,1] != d[,2]),]
        return(d)
    }
    res <- list()
    for (w in unique(holder$w8)) {
        v <- votemat[which(w8 == w),]
        p <- presencemat[which(w8 == w),]
        agree <- crossprod(v, v)
        sat <- crossprod(p, p)
        agree <- melt(agree,
                      varnames = c("judge1","judge2"),
                      value.name = "agree")
        sat <- melt(sat,
                    varnames = c("judge1","judge2"),
                    value.name = "sat")
        agree <- dedupe(agree)
        sat <- dedupe(sat)
        pos <- which(unique(holder$w8) == w)
        res[[pos]] <- merge(agree, sat, all = T)
        res[[pos]]$agree <- res[[pos]]$agree * w
        res[[pos]]$sat <- res[[pos]]$sat * w
    }
    res <- do.call("rbind", res)
    res <- res %>%
        group_by(judge1, judge2) %>%
        summarize(agree = sum(agree),
                  sat = sum(sat))
    res$prop <- res$agree / res$sat
    return(res)
}

if (!file.exists("cache/rolling_agreement.rds")) {
    
    holder <- list()
    for (d in unique(as.character(sheet1$HEARINGDATE1))) {
        d <- as.Date(d)
        votes_until_then <- subset(sheet1, HEARINGDATE1 < d)
        if (nrow(votes_until_then) > 0) {
            agreement <- get_agreement(votes_until_then)
            class(agreement) <- "data.frame"
            holder[[as.character(d)]] <- agreement
        }
        
    }
    saveRDS(holder, file = "cache/rolling_agreement.rds")
} else {
    holder <- readRDS("cache/rolling_agreement.rds")
}

### Now, for each panel, get the SD
panel_agree_sd <- unique(dat[,c("HEARINGDATE", "NEUTRAL")])
panel_agree_sd$ag_sd <- NA
panel_agree_sd$ag_mean <- NA

for (i in 1:nrow(panel_agree_sd)) {
    pos <- which(dat$NEUTRAL == panel_agree_sd$NEUTRAL[i])
    judges <- unique(dat$JUDGE[pos])
    if (!is.na(panel_agree_sd$HEARINGDATE[i])) { 
    the_agree <- which.min(abs(as.Date(names(holder)) - panel_agree_sd$HEARINGDATE[i]))
    the_agree <- holder[[the_agree]]
    the_agree <- subset(the_agree, judge1 %in% judges & judge2 %in% judges)
    panel_agree_sd$ag_sd[i] <- sd(the_agree$prop, na.rm = TRUE)
    panel_agree_sd$ag_mean[i] <- mean(the_agree$prop, na.rm = TRUE)
    }
}
### Merge back
dat <- merge(dat, panel_agree_sd, all.x = TRUE, all.y = FALSE)



## ----judexpch7, eval = TRUE----------------------------------------------
### Rather awkward stuff here
### See http://stackoverflow.com/questions/15698399/cumulative-count-of-unique-values-in-r
### for the background
dat <- dat %>%
    group_by(JUDGE) %>%
    arrange(HANDDOWN, NEUTRAL) %>%
    mutate(experience = cummax(as.numeric(factor(as.character(NEUTRAL), levels = unique(as.character(NEUTRAL))))))



## ----conjoinedch7--------------------------------------------------------
conj <- dat %>%
    group_by(NEUTRAL) %>%
    summarize(nCases = length(unique(CASEID)))
conj2 <- dat %>%
    group_by(NEUTRAL, JUDGE) %>%
    summarize(SomeNotAll = length(unique(DISS)) > 1)



## ----dissentbycase, results = "asis"-------------------------------------
plot.df <- dat %>%
    group_by(NEUTRAL, CASEID) %>%
    summarize(nJudges = length(unique(JUDGE)),
              nDissents = sum(Dissented))
outtab <- with(plot.df, table(nJudges, nDissents))

### Punch out occasions where nDissents >= nJudges / 2
## kill <- outer(as.numeric(rownames(outtab)), as.numeric(colnames(outtab)), function(x,y) y >= (x/2))
## outtab[kill] <- NA

rownames(outtab) <- paste0(rownames(outtab), " judges")
outtab <- rbind(outtab, colSums(outtab, na.rm = TRUE))
outtab <- cbind(outtab, rowSums(outtab, na.rm = TRUE))
emphasize.italics.cols(ncol(outtab))
emphasize.italics.rows(nrow(outtab))
pandoc.table(outtab,
             caption = "Cases featuring dissents by number of dissenting opinions (columns) against number of judges hearing the case (rows). Two cases (UKSC 2014/0219 and UKSC 2015/0218) were heard by 10 judges because Lord Hodge substituted for Lord Toulson. One case has a majority of dissenters because partial dissents were counted as full dissents. ",
      include.rownames = TRUE,
      include.colnames = TRUE,
      missing = '')
num1 <- outtab[nrow(outtab),"0"]
num2 <- outtab[nrow(outtab),ncol(outtab)]
num3 <- round(100 * num1 / num2, 0)


weight.df <- dat %>%
    group_by(NEUTRAL) %>%
    summarize(Weight = 1 / length(unique(CASEID)))

old <- nrow(plot.df)
plot.df <- merge(plot.df, weight.df,
                 by = "NEUTRAL",
                 all.x = TRUE,
                 all.y = FALSE)
new <- nrow(plot.df)
num4 <- sum((plot.df$nDissents == 0) * plot.df$Weight)
num5 <- sum(plot.df$Weight)
num6 <- round(100 * num4 / num5, 0)
### Weighted 


## ----dissentbyjudge, fig = TRUE, fig.cap = "Rates of dissent per judge. Solid vertical line indicates average across all judges. Plotted points are scaled in proportion to the number of cases heard by each judge. ", fig.width = 4, fig.height = 6----
plot.df <- dat %>%
    group_by(NEUTRAL) %>%
    mutate(weight = 1 / length(unique(CASEID))) %>% 
    group_by(JUDGE) %>%
    summarize(nCases = sum(weight),
              nDissents = sum(Dissented * weight))

plot.df$Rate <- plot.df$nDissents  / plot.df$nCases
plot.df <- plot.df[order(plot.df$Rate),]
plot.df <- subset(plot.df, JUDGE %in% 1:20)
plot.df$JudgeName <- num2judge(plot.df$JUDGE)
plot.df$JudgeName <- factor(plot.df$JudgeName,
                            levels = plot.df$JudgeName,
                            ordered = TRUE)
plot.df$custom.cex <- cscale(plot.df$nCases, area_pal()) / 2

ggplot(plot.df, aes(x = JudgeName, y = Rate, size = nCases)) +
    geom_hline(yintercept = mean(plot.df$Rate), colour = "darkgrey") + 
    geom_point() +
    scale_x_discrete("") +
    scale_y_continuous("Rate\n(Dissents divided by number of cases)") +
    scale_size_continuous(guide = "none") + 
    coord_flip() +
    theme_uksc()



## ----modeldataframe------------------------------------------------------
dat.bak <- dat
old <- nrow(dat)
dat <- merge(dat, weight.df,
             all.x = TRUE,
             all.y = FALSE)
## dat <- subset(dat, !is.na(Area))
new <- nrow(dat)
stopifnot(old == new)

dat$ag_sd[is.na(dat$ag_sd)] <- 0

### Create mean Specialisation and workload for missing judges
dat <- dat %>%
    group_by(Area, NEUTRAL, CASEID) %>%
    mutate(Specialisation2 = ifelse(is.na(Specialisation),
                                    mean(Specialisation, na.rm = TRUE),
                                    Specialisation),
           workloadTotal2 = ifelse(is.na(workloadTotal),
                                  mean(workloadTotal, na.rm = TRUE),
                                  workloadTotal),
           panelWorkload = (sum(workloadTotal2, na.rm = TRUE) - workloadTotal2) / (n() - 1),
           panelSpecialisation = (sum(Specialisation2, na.rm = TRUE) - Specialisation2) / (n() - 1),
           panelSpecialisationSD = sd(Specialisation2, na.rm = TRUE),
           panelExperience = (sum(experience, na.rm = TRUE) - experience) / (n() - 1),
           nJudges = n())

### Get the presiding judge
dat <- dat %>%
    group_by(NEUTRAL) %>%
    mutate(Presiding = ifelse(any(Role == "President"),
                              JUDGE[which(Role == "President")[1]],
                                    ifelse(any(Role == "Vice-President"),
                                    JUDGE[which(Role == "Vice-President")[1]],
                                    JUDGE[which.max(experience)])))

           
dat <- dat %>%
    group_by(Area) %>%
    mutate(OpinionBelow = ifelse(is.na(OpinionBelow),
                                 mean(OpinionBelow,na.rm = TRUE),
                                 OpinionBelow))
dat$JUDGE <- factor(dat$JUDGE)


### Lagged dissent behaviour?
library(zoo)
windowSize <- 10
aux <- dat %>%
    distinct(JUDGE, NEUTRAL, HANDDOWN, Dissented) %>%
    group_by(JUDGE, NEUTRAL, HANDDOWN) %>%
    summarize(Dissented = mean(Dissented, na.rm = TRUE)) %>% 
    group_by(JUDGE) %>%
    filter(n() > windowSize) %>%
    arrange(HANDDOWN) %>%
    mutate(rollDissent = rollsum(Dissented, k = windowSize, na.pad = TRUE, align = "right"))

### However, this will get us the rolling function including the present
aux$rollDissent <- (aux$rollDissent - aux$Dissented) / windowSize

aux$rollDissent[is.na(aux$rollDissent)] <- 0
aux$Dissented <- NULL
dat <- merge(dat, aux,
             all.x = TRUE,
             all.y = FALSE)

legal.f <- Dissented ~ OpinionBelow + Importance +
    Specialisation * panelSpecialisation + Area +
    (1|JUDGE) + (1|NEUTRAL)

legal.alt <- Dissented ~ OpinionBelow + Importance +
    Specialisation + panelSpecialisation + Area +
    (1|JUDGE) + (1|NEUTRAL)

org.f <- Dissented ~ Role + 
    workloadTotal + panelWorkload +
    nJudges + Seniority + 
    (1|JUDGE) + (1|NEUTRAL)

political.f <- Dissented ~ ag_mean + HR + Area +
        (1|JUDGE) + (1|NEUTRAL)

comb.f <- Dissented ~ OpinionBelow + Importance +
    Specialisation * panelSpecialisation + Area +
    Role + 
    workloadTotal + panelWorkload +
    nJudges + ag_mean + HR + (1|JUDGE) + (1|NEUTRAL)

comb.alt <- Dissented ~ OpinionBelow + Importance +
    Specialisation + panelSpecialisation + Area +
    Role + Seniority + 
    workloadTotal + panelWorkload +
    nJudges + ag_mean + HR + (1|JUDGE) + (1|NEUTRAL)



## ----rescaleimputation---------------------------------------------------
dat$HR <- as.numeric(dat$HR == "Yes")
vars <- c("OpinionBelow",
          "Importance",
          "Specialisation",
          "panelSpecialisation",
          "workloadTotal",
          "panelWorkload",
          "Seniority",
          "nJudges",
          "ag_mean")
            
means_std_vars <- numeric()
sd_std_vars <- numeric()
for (v in vars) {
    means_std_vars <- c(means_std_vars,
                        mean(dat[[v]], na.rm = TRUE))
    sd_std_vars <- c(sd_std_vars,
                        sd(dat[[v]], na.rm = TRUE))
    dat[,v] <- arm::rescale(dat[[v]])
}

names(means_std_vars) <- names(sd_std_vars) <- vars

keep.vars <- c("Dissented",
               "OpinionBelow",
               "Importance",
               "Role",
               "Specialisation",
               "panelSpecialisation",
               "Area",
               "Seniority",
               "workloadTotal",
               "panelWorkload",
               "nJudges",
               "ag_mean",
               "HR",
               "JUDGE",
               "NEUTRAL",
               "CASEID")

tmp <- amelia(dat[, keep.vars], m = 1,
              boot.type = "none",
              idvars = c("CASEID", "NEUTRAL"),
              noms = c("Area", "Role", "JUDGE"))

model.df <- tmp$imputations[[1]]

model.df$Area <- relevel(model.df$Area,
                         "Civil")

nVotes <- nrow(model.df)
nCases <- with(model.df,
               length(unique(interaction(CASEID, NEUTRAL))))



## ----estimation----------------------------------------------------------
if (!file.exists("cache/07_cached_results.RData")) {
    legal.mod <- glmer(legal.f,
                   data = model.df, # subset(dat, nJudges != 3),
                   family = binomial,
                   glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))

    legalmod.rstanarm <- stan_glmer(legal.f,
                                    data = model.df,
                                    family = binomial,
                                    chains = 2, cores = 2, seed = 1982)
    
    legal.mod_alt <- glmer(legal.alt,
                       data = model.df, # subset(dat, nJudges != 3),
                       family = binomial,
                       glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))

    legalmod.rstanarm_alt <- stan_glmer(legal.alt,
                                    data = model.df,
                                    family = binomial,
                                    chains = 2, cores = 2, seed = 1982)

    
    org.mod <- glmer(org.f,
                     data = model.df, # subset(dat, nJudges != 3),
                     family = binomial,
                     glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))

    orgmod.rstanarm <- stan_glmer(org.f,
                                    data = model.df,
                                    family = binomial,
                                    chains = 2, cores = 2, seed = 1982)

    political.mod <- glmer(political.f,
                           data = model.df, # subset(dat, nJudges != 3),
                           family = binomial,
                           glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))

    polmod.rstanarm <- stan_glmer(political.f,
                                    data = model.df,
                                    family = binomial,
                                    chains = 2, cores = 2, seed = 1982)

    
    comb.mod <-  glmer(comb.f,
                       data = model.df, # subset(dat, nJudges != 3),
                       family = binomial,
                       glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 500000)))

    comb.mod_alt <-  glmer(comb.alt,
                       data = model.df, # subset(dat, nJudges != 3),
                       family = binomial,
                       glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))

    combmod.rstanarm <- stan_glmer(comb.f,
                                    data = model.df,
                                    family = binomial,
                                    chains = 2, cores = 2, seed = 1982)

    combmod.rstanarm_alt <- stan_glmer(comb.alt,
                                    data = model.df,
                                    family = binomial,
                                    chains = 2, cores = 2, seed = 1982)

    save(legal.mod, legal.mod_alt,
         org.mod, political.mod,
         comb.mod, comb.mod_alt,
         legalmod.rstanarm, legalmod.rstanarm_alt, orgmod.rstanarm,
         polmod.rstanarm, combmod.rstanarm, combmod.rstanarm_alt,
         file = "cache/07_cached_results.RData")

} else {
    load("cache/07_cached_results.RData")
}



## ----plotlegal, fig = TRUE, fig.cap = "Model of the probability of dissent, legal models.  Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 4, fig.height = 6----
mycoefplot(legalmod.rstanarm,
         variable.labels = list('Importance'='Importance',
                                'OpinionBelow'='Opinion below',
                                'Specialisation'='Specialisation, individual',
                                'panelSpecialisation'='Specialisation, collective',
                                'Specialisation:panelSpecialisation'='Individual by collective specialisation',
                                'AreaCriminal'='Criminal law',
                                'AreaFamily'='Family law',
                                'AreaN Ireland'='N Ireland',
                                'AreaPublic'='Public',
                                'AreaScots'='Scots',
                                'AreaChancery'='Tax and Chancery',
                                'panelWorkload'='Panel workload',
                                'workloadTotal'='Court workload',
                                'PanelByArea'='Average panel specialism in this area'),
         ylab = "Coefficient value",
         sec.ylab = "Odds of dissenting\nare ... times greater",
         drop = NULL)

s <- summary(legalmod.rstanarm)
scots.coef <- s["AreaScots", "mean"]
scots.lo <- s["AreaScots", "2.5%"]
scots.hi <- s["AreaScots", "97.5%"]

scots.coef <- round(exp(scots.coef), 2) 
scots.int <- c(round(exp(scots.lo), 2),
              round(exp(scots.hi), 2))

impt.coef <- s["Importance", "mean"]
impt.lo <- s["Importance", "2.5%"]
impt.hi <- s["Importance", "97.5%"]

impt.coef <- round(exp(impt.coef), 2) 
impt.int <- c(round(exp(impt.lo), 2),
              round(exp(impt.hi), 2))



## ----plotorg, fig = TRUE, fig.height = 3.5, fig.cap = "Model of the probability of dissent, organisational model.  Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. "----

mycoefplot(orgmod.rstanarm,
         variable.labels = list('Importance'='Importance',
                                'OpinionBelow'='Opinion below',
                                'Specialisation'='Specialisation, individual',
                                'panelSpecialisation'='Specialisation, collective',
                                'Specialisation:panelSpecialisation'='Individual by collective specialisation',
                                'AreaCriminal'='Criminal law',
                                'AreaFamily'='Family law',
                                'AreaN Ireland'='N Ireland',
                                'AreaPublic'='Public',
                                'AreaScots'='Scots',
                                'AreaChancery'='Tax and Chancery',
                                'panelWorkload'='Panel workload',
                                'workloadTotal'='Individual workload',
                                'PanelByArea'='Average panel specialism in this area',
                                'nJudges'='Number of judges',
                                'FreshmanTRUE'='Judge in first six months',
                                'RoleVice-President'='Deputy President',
                                'RolePresident'='President of the Court'),
         ylab = "Coefficient value",
         sec.ylab = "Odds of dissenting\nare ... times greater",
         drop = NULL)

s <- summary(orgmod.rstanarm)
v <- "nJudges"
nJudges.coef <- s[v, "mean"]
nJudges.lo <- s[v, "2.5%"]
nJudges.hi <- s[v, "97.5%"]

nJudges.coef <- round(exp(nJudges.coef), 2) 
nJudges.int <- c(round(exp(nJudges.lo), 2),
              round(exp(nJudges.hi), 2))

v <- "RolePresident"
president.coef <- s[v, "mean"]
president.lo <- s[v, "2.5%"]
president.hi <- s[v, "97.5%"]

president.coef <- round(exp(president.coef), 2) 
president.int <- c(round(exp(president.lo), 2),
              round(exp(president.hi), 2))



## ----plotpolitical, fig = TRUE, fig.cap = "Model of the probability of dissent, political model.  Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 7----

mycoefplot(polmod.rstanarm,
         variable.labels = list('Importance'='Importance',
                                'HR'='Case with human rights issues',
                                'ag_mean'='Avg. agreement w/ panel members',
                                'OpinionBelow'='Opinion below',
                                'Specialisation'='Specialisation, individual',
                                'panelSpecialisation'='Specialisation, collective',
                                'Specialisation:panelSpecialisation'='Individual by collective specialisation',
                                'AreaCriminal'='Criminal law',
                                'AreaFamily'='Family law',
                                'AreaN Ireland'='N Ireland',
                                'AreaPublic'='Public',
                                'AreaScots'='Scots',
                                'AreaChancery'='Tax and Chancery',
                                'panelWorkload'='Panel workload',
                                'workloadTotal'='Individual workload',
                                'PanelByArea'='Average panel specialism in this area'),
         ylab = "Coefficient value",
sec.ylab = "Odds of dissenting are\n... times greater",
         drop = NULL)



## ----plotcomb, fig = TRUE, fig.height = 8, fig.width = 6, fig.cap = "Model of the probability of dissent, combined model.  Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. "----
mycoefplot(combmod.rstanarm,
         variable.labels = list('Importance'='Importance',
                                'HR'='Case with human rights issues',
                                'ag_mean'='Avg. agreement w/ panel members',
                                'OpinionBelow'='Opinion below',
                                'Specialisation'='Specialisation, individual',
                                'panelSpecialisation'='Specialisation, collective',
                                'Specialisation:panelSpecialisation'='Individual by collective specialisation',
                                'AreaCriminal'='Criminal law',
                                'AreaFamily'='Family law',
                                'AreaN Ireland'='N Ireland',
                                'AreaPublic'='Public',
                                'AreaScots'='Scots',
                                'AreaChancery'='Tax and Chancery',
                                'panelWorkload'='Panel workload',
                                'workloadTotal'='Individual workload',
                                'PanelByArea'='Average panel specialism in this area',
                                'nJudges'='Number of judges',
                                'FreshmanTRUE'='Judge in first six months',
                                'RoleVice-President'='Deputy President',
                                'RolePresident'='President of the Court'),
         ylab = "Coefficient value",
sec.ylab = "Odds of dissenting are\n... times greater",
         drop = NULL)



## ----fxplot, fig = TRUE, fig.cap = "Effects of increased specialisation at individual level (left-hand facet) and panel level (right-hand facet) on the probability of dissent", fig.fullwidth = TRUE, fig.width = 9, fig.height = 4.5----
## Base line

bl <- data.frame(OpinionBelow = 0,
                  Importance = 0,
                  Area = "Public",
                  Role = "Member",
                  JUDGE = "6",
                  NEUTRAL = model.df$NEUTRAL[1],
                  workloadTotal = 0,
                  panelWorkload = 0,
                  nJudges = 0,
                  ag_mean = 0,
                  HR = 0)

## Individual specialisation
inseq <- seq(min(model.df$Specialisation),
             max(model.df$Specialisation),
             length.out = 100)

bl <- merge(bl,
            data.frame(Specialisation = inseq),
            all = TRUE)

bl$panelSpecialisation <- 0

plot.df <- data.frame(Specialisation = bl$Specialisation)
plot.df$mean <- plot.df$lo <- plot.df$hi <- NA
plot.df$mean <- plot.df$llo <- plot.df$hhi <- NA

for (i in 1:nrow(plot.df)) {
    preds <- posterior_linpred(combmod.rstanarm,
                               newdata = bl[i, ])[, 1]
    preds <- plogis(preds)
    plot.df$mean[i] <- mean(preds)
    plot.df$lo[i] <- quantile(preds, 1/40)
    plot.df$hi[i] <- quantile(preds, 39/40)
    plot.df$llo[i] <- quantile(preds, 1/12)
    plot.df$hhi[i] <- quantile(preds, 11/12)
}

plot.df$Specialisation <- plot.df$Specialisation *
    2 * sd_std_vars["Specialisation"] +
    means_std_vars["Specialisation"]

rug.df <- dat %>%
    dplyr::select(Specialisation) %>%
    mutate(Specialisation = Specialisation *
               2 * sd_std_vars["Specialisation"] +
               means_std_vars["Specialisation"])
               

## Plot this
p1 <- ggplot(plot.df, aes(x = Specialisation, y = mean)) +
    geom_line() +
    geom_hline(yintercept = 0) + 
    geom_ribbon(aes(ymax = hi, ymin = lo), alpha = 0.35) +
    geom_ribbon(aes(ymax = hhi, ymin = llo), alpha = 0.35) +
    scale_x_continuous("Individual specialisation",
                       labels = scales::percent) +
    scale_y_continuous("Probability of dissent",
                       labels = scales::percent) +
    geom_rug(data = rug.df, mapping = aes(x = Specialisation),
             inherit.aes = FALSE,
             sides = "b",
             alpha = 0.1) +
    geom_rug(data = rug.df %>% summarize(Specialisation = median(Specialisation, na.rm = TRUE)),
             aes(x = Specialisation),
             inherit.aes = FALSE,
             sides = "b",
             size = 1.1,
             color = "red") +
    coord_cartesian(ylim = c(-0.1, 0.4)) + 
    theme_uksc()


bl <- data.frame(OpinionBelow = 0,
                  Importance = 0,
                  Area = "Civil",
                  Role = "Member",
                  JUDGE = "6",
                  NEUTRAL = model.df$NEUTRAL[1],
                  workloadTotal = 0,
                  panelWorkload = 0,
                  nJudges = 0,
                  ag_mean = 0,
                  HR = 0)

## Individual specialisation
inseq <- seq(min(model.df$panelSpecialisation),
             max(model.df$panelSpecialisation),
             length.out = 100)

bl <- merge(bl,
            data.frame(panelSpecialisation = inseq),
            all = TRUE)

bl$Specialisation <- 0

plot.df <- data.frame(panelSpecialisation = bl$panelSpecialisation)
plot.df$mean <- plot.df$lo <- plot.df$hi <- NA
plot.df$mean <- plot.df$llo <- plot.df$hhi <- NA

for (i in 1:nrow(plot.df)) {
    preds <- posterior_linpred(combmod.rstanarm,
                               newdata = bl[i, ])[, 1]
    preds <- plogis(preds)
    plot.df$mean[i] <- mean(preds)
    plot.df$lo[i] <- quantile(preds, 1/40)
    plot.df$hi[i] <- quantile(preds, 39/40)
    plot.df$llo[i] <- quantile(preds, 1/12)
    plot.df$hhi[i] <- quantile(preds, 11/12)
}

plot.df$panelSpecialisation <- plot.df$panelSpecialisation *
    sd_std_vars["panelSpecialisation"] +
    means_std_vars["panelSpecialisation"]

rug.df <- dat %>%
    dplyr::select(panelSpecialisation) %>%
    mutate(panelSpecialisation = panelSpecialisation *
               sd_std_vars["panelSpecialisation"] +
               means_std_vars["panelSpecialisation"])
               

## Plot this
p2 <- ggplot(plot.df, aes(x = panelSpecialisation, y = mean)) +
    geom_line() +
    geom_hline(yintercept = 0) + 
    geom_ribbon(aes(ymax = hi, ymin = lo), alpha = 0.35) +
    geom_ribbon(aes(ymax = hhi, ymin = llo), alpha = 0.35) +
    scale_x_continuous("Panel specialisation",
                       labels = scales::percent) +
    scale_y_continuous("Probability of dissent",

                       labels = scales::percent) +
    geom_rug(data = rug.df, mapping = aes(x = panelSpecialisation),
             inherit.aes = FALSE,
             sides = "b",
             alpha = 0.1) +
    geom_rug(data = rug.df %>% summarize(panelSpecialisation = median(panelSpecialisation, na.rm = TRUE)),
             aes(x = panelSpecialisation),
             inherit.aes = FALSE,
             sides = "b",
             size = 1.1,
             color = "red") +
coord_cartesian(ylim = c(-0.1, 0.4)) + 
    theme_uksc()

grid.arrange(p1, p2, nrow = 1)



## ----interactionplot, fig = TRUE, fig.width = 7, fig.height = 3.5, fig.cap = "Effect of a two standard deviation increase in judge specialisation at different levels of panel specialisation. Light shaded area shows 95 percent credible interval; dark shaded area 90 percent credible interval. Rug on bottom axis shows the distribution of panel specialisation. Red line indicates median panel specialisation. "----

aux <- data.frame(OpinionBelow = 0,
                  Importance = 0,
                  Specialisation = 0,
                  Area = "Civil",
                  Role = "Member",
                  JUDGE = "6",
                  NEUTRAL = model.df$NEUTRAL[1],
                  workloadTotal = 0,
                  panelWorkload = 0,
                  nJudges = 0,
                  ag_mean = 0,
                  HR = 0)

newdata <- merge(aux,
                 with(model.df,
                      data.frame(panelSpecialisation = seq(min(panelSpecialisation),
                                                           max(panelSpecialisation),
                                                           length.out = 100))))

holder <- newdata
holder$mean <-
    holder$lo <-
        holder$hi <-
            holder$llo <-
                holder$hhi <- NA

for (i in 1:nrow(newdata)) {
    mod <- combmod.rstanarm
    w_spec <- newdata[i,]
    wo_spec <- newdata[i,]
    w_spec$Specialisation <- 1
    w_spec <- posterior_linpred(mod,
                      newdata = w_spec)[,1]
    wo_spec <- posterior_linpred(mod,
                                 newdata = wo_spec)[,1]
    delta <- plogis(w_spec) - plogis(wo_spec)
    holder$mean[i] <- mean(delta)
    holder$lo[i] <- quantile(delta, 1/40)
    holder$hi[i] <- quantile(delta, 39/40)
    holder$llo[i] <- quantile(delta, 1/12)
    holder$hhi[i] <- quantile(delta, 11/12)
}


holder$panelSpecialisation <- holder$panelSpecialisation  *
    sd_std_vars["panelSpecialisation"] +
    means_std_vars["panelSpecialisation"]

aux <- dat
aux$panelSpecialisation <- aux$panelSpecialisation  *
    sd_std_vars["panelSpecialisation"] +
    means_std_vars["panelSpecialisation"]

p3 <- ggplot(holder, aes(x = panelSpecialisation, y = mean)) +
    geom_line() +
    geom_hline(yintercept = 0) + 
    geom_ribbon(aes(ymax = hi, ymin = lo), alpha = 0.35) +
    geom_ribbon(aes(ymax = hhi, ymin = llo), alpha = 0.35) +
    scale_x_continuous("Specialisation of the panel\n(0-100%)",
                       labels = scales::percent) +
    scale_y_continuous("Effect of judge specialisation") +
    geom_rug(data = aux, mapping = aes(x = panelSpecialisation),
             inherit.aes = FALSE,
             sides = "b",
             alpha = 0.1) +
    geom_rug(data = aux %>% summarize(panelSpecialisation = median(panelSpecialisation, na.rm = TRUE)),
             aes(x = panelSpecialisation),
             inherit.aes = FALSE,
             sides = "b",
             size = 1.1,
             color = "red") +
    theme_uksc()

print(p3)


## ----fitstatsch7, fig = TRUE, fig.cap = "Goodness-of-fit statistics for different models. Thin lines indicate 95 percent credible intervals; thick lines indicate 83 percent credible intervals. ", warning = FALSE----

epcp.rstanarm <- function(mod) {
    
    poslp <- posterior_linpred(mod)
    sumdf <- as.data.frame(summary(mod)) 
    preds <- (poslp > 0.5)
    
    actual <- matrix(mod$y,
                     nrow = nrow(poslp),
                     ncol = length(mod$y),
                     byrow = TRUE)
    correct <- (preds == actual)

    correct <- apply(correct, 1, mean)
    epcp <- data.frame(term = "PCP\n(bigger is better)",
                       mean = mean(correct),
                       lo = quantile(correct, 0.025),
                       hi = quantile(correct, 0.975),
                       llo = quantile(correct, 1/12),
                       hhi = quantile(correct, 11/12))

    the.loo <- loo(mod)
    the.loo <- data.frame(term = "LOOIC\n(smaller is better)",
                          mean = the.loo$looic,
                          lo = the.loo$looic + qnorm(1/40) * the.loo$se_looic,
                          hi = the.loo$looic + qnorm(39/40) * the.loo$se_looic,
                          llo = the.loo$looic + qnorm(1/24) * the.loo$se_looic,
                          hhi = the.loo$looic + qnorm(23/24) * the.loo$se_looic)
                                        #    return(rbind(epcp, the.loo))
    return(rbind(epcp, the.loo))
}

if (file.exists("cache/07_gof.RData")) {
    load("cache/07_gof.RData")
} else { 
    
    legal.stats <- epcp.rstanarm(legalmod.rstanarm)
    org.stats <- epcp.rstanarm(orgmod.rstanarm)
    pol.stats <- epcp.rstanarm(polmod.rstanarm)
    comb.stats <- epcp.rstanarm(combmod.rstanarm)
    
    legal.stats$Model <- "Legal"
    org.stats$Model <- "Organisational"
    pol.stats$Model <- "Political"
    comb.stats$Model <- "Combined"

    null.stats <- data.frame(Model = "Null",
                             term = "PCP\n(bigger is better)",
                             mean = mean(combmod.rstanarm$y == 0,
                                         na.rm = TRUE))
    
    gof.df <- rbind(
        rbind(
            rbind(legal.stats,
                  org.stats),
            pol.stats),
        comb.stats)
    
    gof.df <- merge(gof.df, null.stats, all = TRUE)
    
    gof.df$Model <- factor(gof.df$Model,
                           levels = rev(c("Null", "Legal",
                                          "Organisational", "Political",
                                          "Combined")),
                           ordered = TRUE)

    legal.loo <- loo(legalmod.rstanarm)
    combined.loo <- loo(combmod.rstanarm)
    political.loo <- loo(polmod.rstanarm)
    org.loo <- loo(orgmod.rstanarm)

    save(list = c("legal.loo", "combined.loo",
                  "political.loo", "org.loo",
                  "gof.df"),
         file = "cache/07_gof.RData")
     legal.loo <- loo(legalmod.rstanarm)

}

my.levels <- gof.df %>%
    filter(grepl("LOOIC", term)) %>%
    arrange(mean) %>%
   pull(Model)

my.levels <- c(as.character(my.levels), "Null")

gof.df$Model <- factor(as.character(gof.df$Model),
levels = rev(my.levels),
ordered = TRUE)

ggplot(gof.df, aes(x = Model, y = mean, ymax = hi, ymin = lo)) +
    geom_linerange(size = inner.bar.size,
                   colour = inner.bar.col,
                   alpha = 0.8) +
        geom_linerange(aes(ymin = llo, ymax = hhi),
                       size = outer.bar.size,
                       colour = outer.bar.col,
                       alpha = 0.8) +
    geom_point(aes(colour = outer.bar.col),
               size= (inner.bar.size + outer.bar.size) * 2.5,
               shape= my.pchs[1],
               stroke = 1.5) +
    scale_colour_identity() + 
    facet_wrap(~term, scales = "free_x") +
    scale_y_continuous("Value") +
    coord_flip() +
    theme_uksc()

combined.pcp <- subset(gof.df,
                       Model == "Combined" & grepl("PCP", term)) %>%
    pull(mean)

combined.pcp <- round(100 * combined.pcp)

